
#### Preliminaries ####

R.version$version.string

# clear workspace
rm(list=ls())

# load packages
library(here)
library(tidyverse)
library(rworldmap) # maps
library(magrittr)
library(reshape2)
library(ggpubr)
library(leaflet)
library(plotly)
library(gapminder)
library(countrycode)
library(ggrepel)
library(ggthemes)
library(modelsummary)

ggplot_theme <- theme(axis.text=element_text(size=16),
                      axis.title=element_text(size=16,face="bold"), 
                      title =element_text(size=16, face='bold'))

legend_size_theme <-   theme(legend.key.size = unit(1, 'cm'), #change legend key size
                             legend.key.height = unit(1, 'cm'), #change legend key height
                             legend.key.width = unit(1, 'cm'), #change legend key width
                             legend.title = element_text(size=16), #change legend title font size
                             legend.text = element_text(size=16)) #change legend text font size 



#---------------------------------------------------#
#--------------------load data ---------------------#
#---------------------------------------------------#

# 1. Load V-Dem data (version 13)

# set you working directory here, when not using the Rproject#

vdem_full_v13 <- readRDS("data/vdem/V-Dem-CY-Full+Others-v13.rds")

vdem_subset <- vdem_full_v13 %>%
  dplyr::select(country_name, year, country_id, starts_with("v2x_polyarchy"),
                starts_with("v2cafres"), starts_with("v2cafexch"), 
                starts_with("v2cainsaut"), starts_with("v2casurv"), 
                starts_with("v2clacfree"))

vdem <- vdem_full_v13


#---------------------------------------------------#
#---- Load Function for getting Episodes of X -----#
#---------------------------------------------------#

# load the relevant functions into the environment

source("R/get_episode_function.R")
source("R/get_episode_wo_CI_function.R")
source("R/plot_all_episode.R")
source("R/plot_episodes_function.R")

#---------------------------------------------------#
#---------Episodes of Academic Freedom Increase-----#
#---------------------------------------------------#

episode_data <- get_episode(data = vdem)
episode_data_wo_CI <- get_episode_wo_CI(data = vdem)

saveRDS(episode_data, file.path("data", "episode_data.rds"))
saveRDS(episode_data_wo_CI, file.path("data","episode_data_wo_CI.rds"))


episodes_decline <- episode_data %>%
  group_by(country_name) %>%
  mutate(lag_v2xca_academ = dplyr::lag(v2xca_academ)) %>%
  filter(decline_episode==1) %>%
  group_by(decline_episode_id) %>%
  dplyr:: summarize(country_name = first (country_name), 
                    start_year = first(year), 
                    end_year = last(year), 
                    VAR_before = first(lag_v2xca_academ),
                    VAR_end = last(v2xca_academ), 
                    decline = VAR_end- VAR_before)

episodes_decline$start_year <- as.character(episodes_decline$start_year)
episodes_decline$end_year <- as.character(episodes_decline$end_year)

datasummary_df(episodes_decline, 
               output = "tables/Table_D1.tex", 
               title = "List of Decline Episodes in Academic Freedom", 
               longtable=TRUE)

episodes_decline$start_year <- as.numeric(episodes_decline$start_year)
episodes_decline$end_year <- as.numeric(episodes_decline$end_year)

episodes_increase <- episode_data %>%
  group_by(country_name) %>%
  mutate(lag_v2xca_academ = dplyr::lag(v2xca_academ)) %>%
  filter(increase_episode==1) %>%
  group_by(increase_episode_id) %>%
  dplyr:: summarize(country_name = first (country_name), 
                    start_year = first(year), 
                    end_year = last(year), 
                    VAR_before = first(lag_v2xca_academ),
                    VAR_end = last(v2xca_academ), 
                    increase =VAR_end- VAR_before)  

episodes_increase$start_year <- as.character(episodes_increase$start_year)
episodes_increase$end_year <- as.character(episodes_increase$end_year)

datasummary_df(episodes_increase, 
               output = "tables/Table_C1.tex", 
               title = "List of Growth Episodes in Academic Freedom", 
               longtable=TRUE)

episodes_increase$start_year <- as.numeric(episodes_increase$start_year)
episodes_increase$end_year <- as.numeric(episodes_increase$end_year)


episodes_decline_wo_CI <- episode_data_wo_CI %>%
  group_by(country_name) %>%
  mutate(lag_v2xca_academ = dplyr::lag(v2xca_academ)) %>%
  filter(decline_episode==1) %>%
  group_by(decline_episode_id) %>%
  dplyr:: summarize(country_name = first (country_name), 
                    start_year = first(year), 
                    end_year = last(year), 
                    VAR_before = first(lag_v2xca_academ),
                    VAR_end = last(v2xca_academ), 
                    decline = VAR_end-VAR_before)  

episodes_decline_wo_CI %>% group_by(country_name) %>% summarise(country_name = n_distinct(country_name))

episodes_decline_wo_CI$start_year <- as.character(episodes_decline_wo_CI$start_year)
episodes_decline_wo_CI$end_year <- as.character(episodes_decline_wo_CI$end_year)

datasummary_df(episodes_decline_wo_CI, 
               output = "tables/Table_B1.tex", 
               title = "List of Decline Episodes in Academic Freedom", 
               longtable=TRUE)

episodes_decline_wo_CI$start_year <- as.numeric(episodes_decline_wo_CI$start_year)
episodes_decline_wo_CI$end_year <- as.numeric(episodes_decline_wo_CI$end_year)

episodes_increase_wo_CI <- episode_data_wo_CI %>%
  group_by(country_name) %>%
  mutate(lag_v2xca_academ = dplyr::lag(v2xca_academ)) %>%
  filter(increase_episode==1) %>%
  group_by(increase_episode_id) %>%
  dplyr:: summarize(country_name = first (country_name), 
                    start_year = first(year), 
                    end_year = last(year), 
                    VAR_before = first(lag_v2xca_academ),
                    VAR_end = last(v2xca_academ), 
                    increase = VAR_end- VAR_before) 

episodes_increase_wo_CI$start_year <- as.character(episodes_increase_wo_CI$start_year)
episodes_increase_wo_CI$end_year <- as.character(episodes_increase_wo_CI$end_year)

datasummary_df(episodes_increase_wo_CI, 
               output = "tables/Table_A1.tex", 
               title = "List of Growth Episodes in Academic Freedom", 
               longtable=TRUE)

episodes_increase_wo_CI$start_year <- as.numeric(episodes_increase_wo_CI$start_year)
episodes_increase_wo_CI$end_year <- as.numeric(episodes_increase_wo_CI$end_year)

episodes_increase_wo_CI %>% group_by(country_name) %>% summarise(country_name = n_distinct(country_name))

plot_all(data.df = episode_data)
ggsave("figures/Overview_Episodes_w_CI.png", height = 20, width = 30, units = "cm", dpi = 800)

#### AFI descriptives ####

vdem %>%
  drop_na(v2xca_academ) %>%
  summarize(country_name = n_distinct(country_text_id))

vdem %>%
  drop_na(v2xca_academ) %>%
  summarize(n = n())

#### Figure 1: World Averages ####

vdem_fig_world <- vdem_full_v13 %>%
  dplyr::select(country_name, year, e_regionpol_6C, v2xca_academ, v2xca_academ_codehigh, v2xca_academ_codelow) %>%
  filter(year >= 1900) %>%
  group_by(year) %>%
  summarize(v2xca_academ_region_mean = mean(v2xca_academ, na.rm = T), 
            v2xca_academ_region_codehigh = mean(v2xca_academ_codehigh, na.rm = T),
            v2xca_academ_region_codelow = mean(v2xca_academ_codelow, na.rm = T)) 


p2 <- ggplot(vdem_fig_world, aes(x =year, y = v2xca_academ_region_mean)) +
  geom_ribbon(aes(ymin=v2xca_academ_region_codelow, ymax= v2xca_academ_region_codehigh), alpha = 0.3) +
  geom_line(aes(), size =1.05) +
  theme_pubclean() + 
  ylim(0,1)+
  scale_x_continuous(breaks = seq(round(min(vdem_subset$year) / 10) * 10, round(max(vdem_subset$year) / 10) * 10, 10)) +
  labs(y = "Academic Freedom Index ",
       color = "", x = "Year",
       size = "",
       fill = "",
       linetype = "", 
       shape = "") +
  ggplot_theme 

p1 <- plot_all(data.df = episode_data_wo_CI) + 
  ggplot_theme +
  legend_size_theme


plot1 <- cowplot::plot_grid(p1, p2,
                            ncol=1,
                            labels="AUTO",
                            align = "v",
                            rel_heights = c(1.1,1.1))

ggsave(plot = plot1, "figures/Overview_Episodes.png", height = 20, width = 25, units = "cm", dpi = 800)


################################################################################
########################## FIGURE 2 ############################################
################################################################################

#### Plot AFI decreases ####

episode_data_wo_CI <- episode_data_wo_CI %>%
  group_by(country_name) %>%
  mutate(lag_v2xca_academ = dplyr::lag(v2xca_academ)) %>%
  ungroup() %>%
  dplyr::select(country_id, year, starts_with("increase_"), starts_with("decline"), lag_v2xca_academ)

vdem_full_v13 <- vdem_full_v13 %>%
  left_join(episode_data_wo_CI, by = c("country_id", "year"))

vdem_subset <- vdem_full_v13 %>%
  dplyr::select(country_name, country_id, year, country_text_id, starts_with("v2xca_academ"), 
                starts_with("increase_"), starts_with("decline"), lag_v2xca_academ, v2cauni, starts_with("gap"), v2x_regime, 
                codingstart, codingend) %>%
  filter(year >=1900) %>%
  
  # prepare dataset for plotting by constructing a full.df with gaps and NAs for the gaps. 
  
  dplyr::arrange(country_text_id, year) %>%
  dplyr::group_by(country_id) %>%
  # make codingstart 1900 or first year thereafter
  dplyr::mutate(codingstart2 = min(hablar::s(ifelse(!is.na(v2x_regime), year, NA))),
                # tag original sample for later use
                origsample = 1) %>%
  # we need to deal with gaps in v-dem coding
  # this balances the dataset
  plm::make.pconsecutive(balanced = TRUE, index = c("country_id", "year")) %>%
  dplyr::group_by(country_id) %>%
  # this fills missing variables we need that are constant within countries
  tidyr::fill(c(country_text_id, country_name, codingend, gapstart1, gapend1, gapstart2, gapend2,
                gapstart3, gapend3)) %>%
  tidyr::fill(c(country_text_id, country_name,codingend, gapstart1, gapend1, gapstart2, gapend2,
                gapstart3, gapend3), .direction = "up")  %>%
  # here we need to recode the gaps as only during the period prior to and during the gap (for our "uncertain" variables)
  dplyr::mutate(gapstart = ifelse(year <= gapend1, gapstart1, NA),
                gapend = ifelse(year <= gapend1, gapend1, NA),
                gapstart = ifelse(!is.na(gapend2) & year > gapend1 & year <= gapend2, gapstart2, gapstart),
                gapend = ifelse(!is.na(gapend2) & year > gapend1 & year <= gapend2, gapend2, gapend),
                gapstart = ifelse(!is.na(gapend3) & year > gapend2 & year <= gapend3, gapstart3, gapstart),
                gapend = ifelse(!is.na(gapend3) & year > gapend2 & year <= gapend3, gapend3, gapend))

# plotting data #

f1 <- ggplot() +
  geom_line(subset(vdem_subset),
            mapping = aes(x = year, y = v2xca_academ, group = country_text_id, color = as.factor(decline_episode)), size = 0.2, alpha = 0.2) +
  geom_line(subset(vdem_subset, decline_episode==1), 
            mapping = aes(x = year, y = v2xca_academ, group = decline_episode_id, color = as.factor(decline_episode)), size = 0.5) +
  theme_pubr() + 
  scale_color_manual(values = c("#d3d3d3", "#920050", "#d3d3d3"), labels = c("No Episode", "Episode", "NA")) +
  scale_x_continuous(breaks = seq(round(min(vdem_subset$year) / 10) * 10, round(max(vdem_subset$year) / 10) * 10, 10)) +
  labs(y = "Academic Freedom Index",
       color = "", x = "Year",
       size = "",
       shape = "",
       title = "",
       subtitle = "",
       caption = "") +
  ggplot_theme

f2 <- ggplot() +
  geom_line(subset(vdem_subset),
            mapping = aes(x = year, y = v2xca_academ, group = country_text_id, color = as.factor(increase_episode)), size = 0.2, alpha = 0.2) +
  geom_line(subset(vdem_subset, increase_episode==1), 
            mapping = aes(x = year, y = v2xca_academ, group = increase_episode_id, color = as.factor(increase_episode)), size = 0.5) +
  theme_pubr() + 
  scale_color_manual(values = c("#d3d3d3", "#FFB81C", "#d3d3d3"), labels = c("No Episode", "Episode", "NA")) +
  scale_x_continuous(breaks = seq(round(min(vdem_subset$year) / 10) * 10, round(max(vdem_subset$year) / 10) * 10, 10)) +
  labs(y = "Academic Freedom Index",
       color = "", x = "Year",
       size = "",
       shape = "",
       title = "",
       subtitle = "",
       caption = "") +
  ggplot_theme

vdem_subset_sum <- vdem_subset %>%
  group_by(year) %>%
  summarize(number_of_countries = sum(v2cauni, na.rm = T), 
            number_of_decliners = sum(decline_episode, na.rm = T), 
            number_of_increasers = sum(increase_episode, na.rm = T)) %>%
  mutate(number_of_countries = number_of_countries - number_of_decliners - number_of_increasers) %>%
  pivot_longer(!year, names_to = "type_country_nr", values_to = "count")

f3 <- ggplot(vdem_subset_sum, aes(x = year, y= count, color = type_country_nr, fill = type_country_nr)) +
  geom_bar(stat="identity", position="stack", width = 0.7) +
  theme_pubr() + 
  scale_color_manual(values = c("#d3d3d3", "#920050", "#FFB81C"),  labels = c("No. of Countries", "No. of Decliners", "No. of Increasers")) +
  scale_fill_manual(values = c("#d3d3d3", "#920050", "#FFB81C"), labels = c("No. of Countries", "No. of Decliners", "No. of Increasers")) +
  scale_x_continuous(breaks = seq(round(min(vdem_subset_sum$year) / 10) * 10, round(max(vdem_subset_sum$year) / 10) * 10, 10)) +
  labs(y = "Countries in Episode",
       color = "", x = "Year",
       size = "",
       fill = "",
       shape = "",
       title = "",
       subtitle = "",
       caption = "") +
  ggplot_theme +
  legend_size_theme

decline_increase_overview <- cowplot::plot_grid(f1+  theme(legend.position = "none"),f2 +  theme(legend.position = "none"), f3,
                                                ncol=1,
                                                align = "v",
                                                labels="AUTO",
                                                rel_heights = c(1.1,1.1,1))

#---------------------------------------------------#

ggsave("figures/decline_increase_overview.pdf", plot = decline_increase_overview, height = 20, width = 20)
ggsave("figures/decline_increase_overview.png", plot = decline_increase_overview, height = 20, width = 20, dpi = 600)

################################################################################
########################## Figure 3  ###########################################
################################################################################

#### decline cases as examples #####

episode_data_wo_CI <- get_episode_wo_CI(data = vdem)

#### Growth Episodes ####

uruguay <- plot_episodes(country = "Uruguay", years = c(1970, 2000)) +ggplot_theme + legend_size_theme
mongolia <- plot_episodes(country = "Mongolia", years = c(1970, 2000))+ggplot_theme + legend_size_theme
czechia <- plot_episodes(country = "Czechia", years = c(1970, 2000))+ggplot_theme + legend_size_theme
south_Korea <- plot_episodes(country = "South Korea", years = c(1970, 2000))+ggplot_theme + legend_size_theme
egypt <- plot_episodes(country = "Egypt", years = c(1970, 2000))+ggplot_theme + legend_size_theme
taiwan <- plot_episodes(country = "Taiwan", years = c(1970, 2000))+ggplot_theme + legend_size_theme


example_countries_growth <- cowplot::plot_grid(uruguay, mongolia, czechia, south_Korea, egypt, taiwan,
                                               ncol=2,
                                               align = "v",
                                               rel_heights = c(1.1,1.1,1.1))

ggsave("figures/example_countries_growth.pdf", plot = example_countries_growth, height =30, width = 40, units = "cm")
ggsave("figures/example_countries_growth.png", plot = example_countries_growth, height =30, width = 40, units = "cm")

################################################################################
########################## FIGURE 4 ############################################
################################################################################

vdem_subset_waves <- vdem_subset_sum %>%
  pivot_wider(id_cols = year, names_from = type_country_nr, values_from = count) %>%
  dplyr::mutate(net_change = number_of_increasers - number_of_decliners, 
                roll_change = zoo::rollmean(net_change, 5, align = "center", na.pad = TRUE))

ggplot(vdem_subset_waves, aes(x = year, y= net_change)) +
  geom_bar(stat="identity", position="stack", width = 0.7) +
  geom_line(aes(y = roll_change), color = "#920050", size = 1.05) +
  theme_pubr() + 
  scale_x_continuous(breaks = seq(round(min(vdem_subset_sum$year) / 10) * 10, round(max(vdem_subset_sum$year) / 10) * 10, 10)) +
  labs(y = "Number of Growth Episodes -\nNumber of Decline Episodes",
       color = "", x = "Year",
       size = "",
       fill = "",
       shape = "",
       title = "",
       subtitle = "",
       caption = "") +
  theme(axis.text=element_text(size=13),
        axis.title=element_text(size=16,face="bold"))

ggsave("figures/net_change.pdf",height = 10, width = 20, units = "cm", dpi = 800)
ggsave("figures/net_change.png",height = 10, width = 20, units = "cm", dpi = 800)


################################################################################
########################## Text Info ###########################################
################################################################################

#### average duration of growth episodes ####
episodes_increase_wo_CI <- episodes_increase_wo_CI %>%
  mutate(duration = (end_year - start_year) + 1) 

episodes_increase_wo_CI %>%
  summarize(mean = mean(duration), 
            st = sd(duration),
            min = min(duration), 
            max =max(duration))

episodes_increase_wo_CI %>%
  summarize(mean = mean(increase), 
            st = sd(increase),
            min = min(increase), 
            max =max(increase))

#### average duration of decline episodes ####

episodes_decline_wo_CI %>% group_by(country_name) %>% summarise(country_name = n_distinct(country_name))

episodes_decline_wo_CI <- episodes_decline_wo_CI %>%
  mutate(duration = (end_year - start_year) + 1) 

episodes_decline_wo_CI %>%
  summarize(mean = mean(duration), 
            st = sd(duration),
            min = min(duration), 
            max =max(duration))

episodes_decline_wo_CI %>%
  summarize(mean = mean(decline), 
            st = sd(decline),
            min = min(decline), 
            max =max(decline))

episodes_decline_wo_CI %>%
  filter(start_year >=2003 | end_year >=2003) %>%
  summarize(n = n_distinct(country_name))

episodes_decline_wo_CI %>%
  filter(start_year >=2003) %>%
  summarize(mean = mean(duration), 
            st = sd(duration),
            min = min(duration), 
            max =max(duration))

episodes_decline_wo_CI %>%
  filter(start_year <2003) %>%
  summarize(mean = mean(duration), 
            st = sd(duration),
            min = min(duration), 
            max =max(duration))


################################################################################
########################## Figure 5  ###########################################
################################################################################

#### decline cases as examples #####

usa <- plot_episodes(country = "United States of America", years = c(1995, 2022)) +ggplot_theme + legend_size_theme
russia <- plot_episodes(country = "Russia", years = c(1995, 2022))+ggplot_theme + legend_size_theme
hongkong <- plot_episodes(country = "Hong Kong", years = c(1995, 2022))+ggplot_theme + legend_size_theme
brazil <- plot_episodes(country = "Brazil", years = c(1995, 2022))+ggplot_theme + legend_size_theme
india <- plot_episodes(country = "India", years = c(1995, 2022))+ggplot_theme + legend_size_theme
hungary <- plot_episodes(country = "Hungary", years = c(1995, 2022))+ggplot_theme + legend_size_theme


example_countries <- cowplot::plot_grid(usa, russia, hongkong, brazil, india, hungary,
                                        ncol=2,
                                        align = "v",
                                        rel_heights = c(1.1,1.1,1.1))

ggsave("figures/example_countries_decliners.pdf", plot = example_countries, height =30, width = 40, units = "cm")
ggsave("figures/example_countries_decliners.png", plot = example_countries, height =30, width = 40, units = "cm")



################################################################################
########################## Table 1  ############################################
################################################################################
#---------------------------------------------------#
#### Table 1: Comparing Decline Onset across Regime Types ####

table_1_data <- vdem_full_v13 %>%
  dplyr::select(country_name, country_id, year, country_text_id, starts_with("v2xca_academ"), 
                starts_with("increase_"), starts_with("decline"), lag_v2xca_academ, v2cauni, 
                starts_with("gap"), v2x_regime, codingstart, codingend) %>%
  mutate(v2x_regime_lag = dplyr::lag(v2x_regime)) %>%
  filter(year >=1900) %>%
  mutate(waves_helper = ifelse(year %in% c(1928:1943), "First Wave", 
                               ifelse(year %in% c(1963:1974), "Second Wave", 
                                      ifelse(year %in% c(2012:2021), "Third Wave", NA)))) %>%
  group_by(country_id) %>%
  fill(v2x_regime_lag, .direction = "up") %>%
  group_by(decline_episode_id) %>%
  mutate(first_v2x_regime = first(v2x_regime_lag)) %>%
  filter(waves_helper %in% c("First Wave", "Second Wave", "Third Wave")) %>%
  filter(!is.na(decline_episode_id)) %>%
  group_by(decline_episode_id) %>%
  summarize(waves_helper = first(waves_helper), 
            first_v2x_regime = first(first_v2x_regime)) %>%
  mutate(first_v2x_regime = ifelse(first_v2x_regime >=2, "Democracy", "Autocracy")) %>%
  group_by(waves_helper, first_v2x_regime) %>%
  count()  
